Load requred libraries, load data and move factor columns ahead.
library(readxl)
library(tidyverse)
library(car)
library(Hmisc)
library(multcomp)
library(gridExtra)
library(vegan)
library(ggpubr)
library(plotly)
library(DESeq2)
setwd('/home/alexey/BI/R/Project_3/')
download.file(url='https://archive.ics.uci.edu/ml/machine-learning-databases/00342/Data_Cortex_Nuclear.xls',
destfile='Data_Cortex_Nuclear.xls')
data <- read_xls('Data_Cortex_Nuclear.xls')
# Factors first
data <- data %>%
relocate(where(is.numeric), .after = where(is.character)) %>%
mutate(across(where(is.character), as.factor))
data[1:10,1:10]
## # A tibble: 10 x 10
## MouseID Genotype Treatment Behavior class DYRK1A_N ITSN1_N BDNF_N NR1_N
## <fct> <fct> <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl>
## 1 309_1 Control Memantine C/S c-CS… 0.504 0.747 0.430 2.82
## 2 309_2 Control Memantine C/S c-CS… 0.515 0.689 0.412 2.79
## 3 309_3 Control Memantine C/S c-CS… 0.509 0.730 0.418 2.69
## 4 309_4 Control Memantine C/S c-CS… 0.442 0.617 0.359 2.47
## 5 309_5 Control Memantine C/S c-CS… 0.435 0.617 0.359 2.37
## 6 309_6 Control Memantine C/S c-CS… 0.448 0.628 0.367 2.39
## 7 309_7 Control Memantine C/S c-CS… 0.428 0.574 0.343 2.33
## 8 309_8 Control Memantine C/S c-CS… 0.417 0.564 0.328 2.26
## 9 309_9 Control Memantine C/S c-CS… 0.386 0.538 0.318 2.13
## 10 309_10 Control Memantine C/S c-CS… 0.381 0.499 0.362 2.10
## # … with 1 more variable: NR2A_N <dbl>
So, what is our data?
res <- merge(data %>% group_by(class) %>% summarise(Total_entries = n(), .groups = "keep"),
na.omit(data) %>% group_by(class) %>% summarise(Entries_without_NA = n(), .groups = "keep"),
by = "class")
res$class <- as.character(res$class)
res %>% rbind(c('All classes', nrow(data), nrow(na.omit(data))))
## class Total_entries Entries_without_NA
## 1 c-CS-m 150 45
## 2 c-CS-s 135 75
## 3 c-SC-m 150 60
## 4 c-SC-s 135 75
## 5 t-CS-m 135 90
## 6 t-CS-s 105 75
## 7 t-SC-m 135 60
## 8 t-SC-s 135 72
## 9 All classes 1080 552
So, roughly half of our data contain one or more NAs in protein levels; every class have from 135 to 150 mice, including 45 to 90 mice without any NA in 77 proteins.
For this question, we should use one-way variance analysis. Filter data from NA and see at our abundances.
aov_data <- data %>% dplyr::select(class, BDNF_N) %>% na.omit
aov_data %>% group_by(class) %>% summarise(n = n(), .groups = "keep")
## # A tibble: 8 x 2
## # Groups: class [8]
## class n
## <fct> <int>
## 1 c-CS-m 150
## 2 c-CS-s 135
## 3 c-SC-m 150
## 4 c-SC-s 135
## 5 t-CS-m 135
## 6 t-CS-s 105
## 7 t-SC-m 135
## 8 t-SC-s 132
105 and 150 looks non-equal, but we don’t have strict criteria for number of elements, ant it is not more than 1.5 times, so, go ahead with this data.
aov_fit <- aov(BDNF_N ~ class, data = aov_data)
summary(aov_fit)
## Df Sum Sq Mean Sq F value Pr(>F)
## class 7 0.2878 0.04112 18.82 <2e-16 ***
## Residuals 1069 2.3362 0.00219
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Also check, can we use variance analysis at all. Check normality of residues and Cook’s distance
fortified_data <- fortify(aov_fit)
ggplot(fortified_data, aes(x = 1:nrow(fortified_data), y = .cooksd)) + geom_bar(stat = "identity") + ggtitle("Cook's distance plot")
ggplot(fortified_data, aes(x = class, y = .stdresid)) + geom_boxplot() + ggtitle("Residuals plot")
qqPlot(aov_fit, id = FALSE, main = "Residuals plot")
Ok, Cook’s distance is lower, than 0.015 (we havn’t outliers), and residuals distributed normally. We can use aov.
Ok, there is significant difference in BDNF_N level in different classes. In which pairs exactly they are? Perform post-hoc tests:
post_hoch <- glht(aov_fit, linfct = mcp(class = "Tukey"))
summary(post_hoch)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: aov(formula = BDNF_N ~ class, data = aov_data)
##
## Linear Hypotheses:
## Estimate Std. Error t value Pr(>|t|)
## c-CS-s - c-CS-m == 0 0.0030979 0.0055459 0.559 0.99930
## c-SC-m - c-CS-m == 0 -0.0482717 0.0053980 -8.942 < 0.001 ***
## c-SC-s - c-CS-m == 0 -0.0258249 0.0055459 -4.657 < 0.001 ***
## t-CS-m - c-CS-m == 0 -0.0264852 0.0055459 -4.776 < 0.001 ***
## t-CS-s - c-CS-m == 0 -0.0337570 0.0059483 -5.675 < 0.001 ***
## t-SC-m - c-CS-m == 0 -0.0181541 0.0055459 -3.273 0.02420 *
## t-SC-s - c-CS-m == 0 -0.0136310 0.0055790 -2.443 0.22105
## c-SC-m - c-CS-s == 0 -0.0513696 0.0055459 -9.263 < 0.001 ***
## c-SC-s - c-CS-s == 0 -0.0289228 0.0056900 -5.083 < 0.001 ***
## t-CS-m - c-CS-s == 0 -0.0295831 0.0056900 -5.199 < 0.001 ***
## t-CS-s - c-CS-s == 0 -0.0368549 0.0060829 -6.059 < 0.001 ***
## t-SC-m - c-CS-s == 0 -0.0212520 0.0056900 -3.735 0.00486 **
## t-SC-s - c-CS-s == 0 -0.0167289 0.0057223 -2.923 0.06872 .
## c-SC-s - c-SC-m == 0 0.0224468 0.0055459 4.047 0.00147 **
## t-CS-m - c-SC-m == 0 0.0217865 0.0055459 3.928 0.00222 **
## t-CS-s - c-SC-m == 0 0.0145147 0.0059483 2.440 0.22245
## t-SC-m - c-SC-m == 0 0.0301176 0.0055459 5.431 < 0.001 ***
## t-SC-s - c-SC-m == 0 0.0346406 0.0055790 6.209 < 0.001 ***
## t-CS-m - c-SC-s == 0 -0.0006603 0.0056900 -0.116 1.00000
## t-CS-s - c-SC-s == 0 -0.0079321 0.0060829 -1.304 0.89721
## t-SC-m - c-SC-s == 0 0.0076708 0.0056900 1.348 0.87975
## t-SC-s - c-SC-s == 0 0.0121939 0.0057223 2.131 0.39481
## t-CS-s - t-CS-m == 0 -0.0072718 0.0060829 -1.195 0.93310
## t-SC-m - t-CS-m == 0 0.0083311 0.0056900 1.464 0.82588
## t-SC-s - t-CS-m == 0 0.0128542 0.0057223 2.246 0.32408
## t-SC-m - t-CS-s == 0 0.0156029 0.0060829 2.565 0.17016
## t-SC-s - t-CS-s == 0 0.0201260 0.0061130 3.292 0.02269 *
## t-SC-s - t-SC-m == 0 0.0045231 0.0057223 0.790 0.99360
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
And sure, draw the plot
ggplot(aov_data, aes(x = class, y = BDNF_N)) + geom_boxplot() + theme(axis.title.x=element_blank())
Make full model for all proteins in dataset. Remove all NA-values, and make standartisation.
proteins <- data %>% keep(is.numeric)
proteins_scale <- as.data.frame(sapply(proteins, scale)) %>% na.omit()
formula <- as.formula(ERBB4_N ~ .)
fit <- lm(formula, data = proteins_scale)
summary(fit)
##
## Call:
## lm(formula = formula, data = proteins_scale)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5286 -0.2236 -0.0084 0.2038 2.1204
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.018686 0.030373 -0.615 0.538703
## DYRK1A_N -0.112916 0.123677 -0.913 0.361710
## ITSN1_N 0.135706 0.176665 0.768 0.442776
## BDNF_N 0.125598 0.062889 1.997 0.046378 *
## NR1_N -0.324746 0.154046 -2.108 0.035544 *
## NR2A_N -0.006494 0.084269 -0.077 0.938610
## pAKT_N 0.219939 0.084104 2.615 0.009203 **
## pBRAF_N -0.073621 0.055001 -1.339 0.181356
## pCAMKII_N -0.001247 0.068353 -0.018 0.985451
## pCREB_N -0.130058 0.062926 -2.067 0.039290 *
## pELK_N 0.001466 0.031915 0.046 0.963391
## pERK_N -0.455159 0.180720 -2.519 0.012109 *
## pJNK_N -0.208138 0.087537 -2.378 0.017814 *
## PKCA_N 0.029035 0.094839 0.306 0.759625
## pMEK_N 0.026029 0.082902 0.314 0.753676
## pNR1_N -0.200848 0.104398 -1.924 0.054968 .
## pNR2A_N 0.125305 0.090307 1.388 0.165927
## pNR2B_N 0.346375 0.119623 2.896 0.003959 **
## pPKCAB_N 0.183064 0.088648 2.065 0.039457 *
## pRSK_N 0.044267 0.063137 0.701 0.483573
## AKT_N -0.005697 0.069193 -0.082 0.934413
## BRAF_N 0.222600 0.174199 1.278 0.201926
## CAMKII_N -0.011911 0.069865 -0.170 0.864700
## CREB_N -0.025378 0.054087 -0.469 0.639135
## ELK_N 0.078202 0.082869 0.944 0.345811
## ERK_N 0.202047 0.121582 1.662 0.097208 .
## GSK3B_N -0.094835 0.110826 -0.856 0.392588
## JNK_N -0.026715 0.075987 -0.352 0.725317
## MEK_N 0.023943 0.059893 0.400 0.689510
## TRKA_N 0.045878 0.131966 0.348 0.728257
## RSK_N -0.042932 0.073088 -0.587 0.557214
## APP_N -0.021694 0.074076 -0.293 0.769751
## Bcatenin_N 0.032010 0.153167 0.209 0.834546
## SOD1_N -0.116180 0.074190 -1.566 0.118018
## MTOR_N 0.180821 0.071743 2.520 0.012048 *
## P38_N -0.095941 0.079871 -1.201 0.230267
## pMTOR_N -0.077431 0.062293 -1.243 0.214475
## DSCR1_N -0.045716 0.068264 -0.670 0.503374
## AMPKA_N 0.104715 0.095270 1.099 0.272263
## NR2B_N 0.043254 0.053527 0.808 0.419449
## pNUMB_N -0.027045 0.083900 -0.322 0.747326
## RAPTOR_N 0.170702 0.088604 1.927 0.054629 .
## TIAM1_N -0.146606 0.087340 -1.679 0.093892 .
## pP70S6_N 0.028382 0.054994 0.516 0.606022
## NUMB_N -0.064220 0.073553 -0.873 0.383042
## P70S6_N -0.049965 0.058704 -0.851 0.395118
## pGSK3B_N 0.165450 0.052053 3.179 0.001577 **
## pPKCG_N -0.293093 0.075289 -3.893 0.000113 ***
## CDK5_N 0.001764 0.024323 0.073 0.942225
## S6_N 0.142854 0.069736 2.049 0.041059 *
## ADARB1_N -0.062631 0.048972 -1.279 0.201557
## AcetylH3K9_N 0.025985 0.135676 0.192 0.848199
## RRP1_N -0.031502 0.021968 -1.434 0.152232
## BAX_N -0.162854 0.044251 -3.680 0.000260 ***
## ARC_N 0.159773 0.063924 2.499 0.012775 *
## nNOS_N -0.007853 0.047195 -0.166 0.867918
## Tau_N 0.324281 0.082865 3.913 0.000104 ***
## GFAP_N -0.041293 0.047418 -0.871 0.384291
## GluR3_N -0.016512 0.056395 -0.293 0.769812
## GluR4_N -0.125414 0.060173 -2.084 0.037672 *
## IL1B_N 0.154896 0.058877 2.631 0.008794 **
## P3525_N 0.099458 0.048249 2.061 0.039810 *
## pCASP9_N 0.133665 0.050126 2.667 0.007923 **
## PSD95_N 0.401534 0.064325 6.242 9.55e-10 ***
## SNCA_N -0.018225 0.055822 -0.326 0.744195
## Ubiquitin_N 0.026834 0.056238 0.477 0.633468
## pGSK3B_Tyr216_N 0.175747 0.062933 2.793 0.005439 **
## SHH_N 0.094333 0.038435 2.454 0.014471 *
## BAD_N -0.070404 0.068748 -1.024 0.306311
## BCL2_N 0.011231 0.056615 0.198 0.842840
## pS6_N NA NA NA NA
## pCFOS_N -0.019510 0.049260 -0.396 0.692240
## SYP_N 0.059670 0.047577 1.254 0.210395
## H3AcK18_N 0.009463 0.099598 0.095 0.924349
## EGR1_N -0.016129 0.070213 -0.230 0.818406
## H3MeK4_N 0.045206 0.079340 0.570 0.569097
## CaNA_N -0.036804 0.071937 -0.512 0.609159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3905 on 476 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8427
## F-statistic: 40.37 on 75 and 476 DF, p-value: < 2.2e-16
NAs for pS6_N protein is strange - perhaps, this one aliased by another predictor?
attributes(alias(fit)$Complete)$dimnames[[1]]
## [1] "pS6_N"
apply(proteins_scale, 2, function(col)cor(col, proteins_scale$pS6_N))
## DYRK1A_N ITSN1_N BDNF_N NR1_N NR2A_N
## -0.363711184 -0.223362718 0.242241256 0.404419650 0.372721618
## pAKT_N pBRAF_N pCAMKII_N pCREB_N pELK_N
## 0.434484415 0.355164671 0.475099641 0.440425689 -0.034131024
## pERK_N pJNK_N PKCA_N pMEK_N pNR1_N
## -0.388125679 0.484425623 0.033929900 0.490697989 0.447917932
## pNR2A_N pNR2B_N pPKCAB_N pRSK_N AKT_N
## 0.625902235 0.494241838 -0.300561178 0.008747293 0.624537450
## BRAF_N CAMKII_N CREB_N ELK_N ERK_N
## -0.379818814 0.426300117 0.260584566 0.358499140 0.274766842
## GSK3B_N JNK_N MEK_N TRKA_N RSK_N
## -0.167603930 0.218392878 0.418963882 0.279564901 0.224140611
## APP_N Bcatenin_N SOD1_N MTOR_N P38_N
## 0.072285884 0.431648030 0.615216495 0.428779533 0.476743809
## pMTOR_N DSCR1_N AMPKA_N NR2B_N pNUMB_N
## 0.568276940 0.282901796 0.375464887 0.500962579 -0.059493121
## RAPTOR_N TIAM1_N pP70S6_N NUMB_N P70S6_N
## 0.328641375 0.296692236 -0.028621124 0.115344538 0.400447005
## pGSK3B_N pPKCG_N CDK5_N S6_N ADARB1_N
## -0.100835693 -0.181076929 0.034639173 -0.149459188 0.218258161
## AcetylH3K9_N RRP1_N BAX_N ARC_N ERBB4_N
## 0.080345273 0.041362658 0.374905849 1.000000000 0.708290886
## nNOS_N Tau_N GFAP_N GluR3_N GluR4_N
## 0.683079589 0.234490417 -0.085638927 0.182006656 0.243438877
## IL1B_N P3525_N pCASP9_N PSD95_N SNCA_N
## 0.618194657 0.355734908 0.452368140 0.651626726 0.600497580
## Ubiquitin_N pGSK3B_Tyr216_N SHH_N BAD_N BCL2_N
## 0.705410687 0.044204014 0.383026252 -0.021234815 0.205289439
## pS6_N pCFOS_N SYP_N H3AcK18_N EGR1_N
## 1.000000000 0.055734370 0.395031542 0.203910599 0.224491255
## H3MeK4_N CaNA_N
## 0.199488168 -0.374123955
Yep. ARC_N and pS6_N are fully correlated, so, they are aliases. We haven’t any information about significance of every protein, so, drop last one - pS6_N - and revaluate the model.
fit <- update(fit, . ~ . -pS6_N)
summary(fit)
##
## Call:
## lm(formula = ERBB4_N ~ DYRK1A_N + ITSN1_N + BDNF_N + NR1_N +
## NR2A_N + pAKT_N + pBRAF_N + pCAMKII_N + pCREB_N + pELK_N +
## pERK_N + pJNK_N + PKCA_N + pMEK_N + pNR1_N + pNR2A_N + pNR2B_N +
## pPKCAB_N + pRSK_N + AKT_N + BRAF_N + CAMKII_N + CREB_N +
## ELK_N + ERK_N + GSK3B_N + JNK_N + MEK_N + TRKA_N + RSK_N +
## APP_N + Bcatenin_N + SOD1_N + MTOR_N + P38_N + pMTOR_N +
## DSCR1_N + AMPKA_N + NR2B_N + pNUMB_N + RAPTOR_N + TIAM1_N +
## pP70S6_N + NUMB_N + P70S6_N + pGSK3B_N + pPKCG_N + CDK5_N +
## S6_N + ADARB1_N + AcetylH3K9_N + RRP1_N + BAX_N + ARC_N +
## nNOS_N + Tau_N + GFAP_N + GluR3_N + GluR4_N + IL1B_N + P3525_N +
## pCASP9_N + PSD95_N + SNCA_N + Ubiquitin_N + pGSK3B_Tyr216_N +
## SHH_N + BAD_N + BCL2_N + pCFOS_N + SYP_N + H3AcK18_N + EGR1_N +
## H3MeK4_N + CaNA_N, data = proteins_scale)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.5286 -0.2236 -0.0084 0.2038 2.1204
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.018686 0.030373 -0.615 0.538703
## DYRK1A_N -0.112916 0.123677 -0.913 0.361710
## ITSN1_N 0.135706 0.176665 0.768 0.442776
## BDNF_N 0.125598 0.062889 1.997 0.046378 *
## NR1_N -0.324746 0.154046 -2.108 0.035544 *
## NR2A_N -0.006494 0.084269 -0.077 0.938610
## pAKT_N 0.219939 0.084104 2.615 0.009203 **
## pBRAF_N -0.073621 0.055001 -1.339 0.181356
## pCAMKII_N -0.001247 0.068353 -0.018 0.985451
## pCREB_N -0.130058 0.062926 -2.067 0.039290 *
## pELK_N 0.001466 0.031915 0.046 0.963391
## pERK_N -0.455159 0.180720 -2.519 0.012109 *
## pJNK_N -0.208138 0.087537 -2.378 0.017814 *
## PKCA_N 0.029035 0.094839 0.306 0.759625
## pMEK_N 0.026029 0.082902 0.314 0.753676
## pNR1_N -0.200848 0.104398 -1.924 0.054968 .
## pNR2A_N 0.125305 0.090307 1.388 0.165927
## pNR2B_N 0.346375 0.119623 2.896 0.003959 **
## pPKCAB_N 0.183064 0.088648 2.065 0.039457 *
## pRSK_N 0.044267 0.063137 0.701 0.483573
## AKT_N -0.005697 0.069193 -0.082 0.934413
## BRAF_N 0.222600 0.174199 1.278 0.201926
## CAMKII_N -0.011911 0.069865 -0.170 0.864700
## CREB_N -0.025378 0.054087 -0.469 0.639135
## ELK_N 0.078202 0.082869 0.944 0.345811
## ERK_N 0.202047 0.121582 1.662 0.097208 .
## GSK3B_N -0.094835 0.110826 -0.856 0.392588
## JNK_N -0.026715 0.075987 -0.352 0.725317
## MEK_N 0.023943 0.059893 0.400 0.689510
## TRKA_N 0.045878 0.131966 0.348 0.728257
## RSK_N -0.042932 0.073088 -0.587 0.557214
## APP_N -0.021694 0.074076 -0.293 0.769751
## Bcatenin_N 0.032010 0.153167 0.209 0.834546
## SOD1_N -0.116180 0.074190 -1.566 0.118018
## MTOR_N 0.180821 0.071743 2.520 0.012048 *
## P38_N -0.095941 0.079871 -1.201 0.230267
## pMTOR_N -0.077431 0.062293 -1.243 0.214475
## DSCR1_N -0.045716 0.068264 -0.670 0.503374
## AMPKA_N 0.104715 0.095270 1.099 0.272263
## NR2B_N 0.043254 0.053527 0.808 0.419449
## pNUMB_N -0.027045 0.083900 -0.322 0.747326
## RAPTOR_N 0.170702 0.088604 1.927 0.054629 .
## TIAM1_N -0.146606 0.087340 -1.679 0.093892 .
## pP70S6_N 0.028382 0.054994 0.516 0.606022
## NUMB_N -0.064220 0.073553 -0.873 0.383042
## P70S6_N -0.049965 0.058704 -0.851 0.395118
## pGSK3B_N 0.165450 0.052053 3.179 0.001577 **
## pPKCG_N -0.293093 0.075289 -3.893 0.000113 ***
## CDK5_N 0.001764 0.024323 0.073 0.942225
## S6_N 0.142854 0.069736 2.049 0.041059 *
## ADARB1_N -0.062631 0.048972 -1.279 0.201557
## AcetylH3K9_N 0.025985 0.135676 0.192 0.848199
## RRP1_N -0.031502 0.021968 -1.434 0.152232
## BAX_N -0.162854 0.044251 -3.680 0.000260 ***
## ARC_N 0.159773 0.063924 2.499 0.012775 *
## nNOS_N -0.007853 0.047195 -0.166 0.867918
## Tau_N 0.324281 0.082865 3.913 0.000104 ***
## GFAP_N -0.041293 0.047418 -0.871 0.384291
## GluR3_N -0.016512 0.056395 -0.293 0.769812
## GluR4_N -0.125414 0.060173 -2.084 0.037672 *
## IL1B_N 0.154896 0.058877 2.631 0.008794 **
## P3525_N 0.099458 0.048249 2.061 0.039810 *
## pCASP9_N 0.133665 0.050126 2.667 0.007923 **
## PSD95_N 0.401534 0.064325 6.242 9.55e-10 ***
## SNCA_N -0.018225 0.055822 -0.326 0.744195
## Ubiquitin_N 0.026834 0.056238 0.477 0.633468
## pGSK3B_Tyr216_N 0.175747 0.062933 2.793 0.005439 **
## SHH_N 0.094333 0.038435 2.454 0.014471 *
## BAD_N -0.070404 0.068748 -1.024 0.306311
## BCL2_N 0.011231 0.056615 0.198 0.842840
## pCFOS_N -0.019510 0.049260 -0.396 0.692240
## SYP_N 0.059670 0.047577 1.254 0.210395
## H3AcK18_N 0.009463 0.099598 0.095 0.924349
## EGR1_N -0.016129 0.070213 -0.230 0.818406
## H3MeK4_N 0.045206 0.079340 0.570 0.569097
## CaNA_N -0.036804 0.071937 -0.512 0.609159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3905 on 476 degrees of freedom
## Multiple R-squared: 0.8641, Adjusted R-squared: 0.8427
## F-statistic: 40.37 on 75 and 476 DF, p-value: < 2.2e-16
Next step - remove multicollinearity
vif(fit)
## DYRK1A_N ITSN1_N BDNF_N NR1_N NR2A_N
## 23.736607 59.473637 16.669610 96.311682 25.127918
## pAKT_N pBRAF_N pCAMKII_N pCREB_N pELK_N
## 21.880134 9.671403 19.251803 17.421324 2.584965
## pERK_N pJNK_N PKCA_N pMEK_N pNR1_N
## 55.739024 26.508561 38.653895 23.634116 44.380168
## pNR2A_N pNR2B_N pPKCAB_N pRSK_N AKT_N
## 33.053974 55.826515 33.036076 15.913962 17.993694
## BRAF_N CAMKII_N CREB_N ELK_N ERK_N
## 41.690503 16.062122 9.992444 29.844699 55.249434
## GSK3B_N JNK_N MEK_N TRKA_N RSK_N
## 32.819856 18.997732 15.629309 72.480935 16.236937
## APP_N Bcatenin_N SOD1_N MTOR_N P38_N
## 21.563500 91.994091 20.108411 17.860768 19.953844
## pMTOR_N DSCR1_N AMPKA_N NR2B_N pNUMB_N
## 17.905757 14.567686 27.108067 11.017845 20.067495
## RAPTOR_N TIAM1_N pP70S6_N NUMB_N P70S6_N
## 19.127590 21.793679 9.784679 23.482651 13.516220
## pGSK3B_N pPKCG_N CDK5_N S6_N ADARB1_N
## 12.102837 22.052092 2.976573 17.503666 9.335931
## AcetylH3K9_N RRP1_N BAX_N ARC_N nNOS_N
## 42.778299 1.716449 8.071101 14.624584 9.589675
## Tau_N GFAP_N GluR3_N GluR4_N IL1B_N
## 17.829428 5.422842 10.719066 8.004983 9.575112
## P3525_N pCASP9_N PSD95_N SNCA_N Ubiquitin_N
## 7.896508 9.082792 14.501625 9.291047 10.532632
## pGSK3B_Tyr216_N SHH_N BAD_N BCL2_N pCFOS_N
## 11.355079 5.101022 13.413137 8.480518 6.364879
## SYP_N H3AcK18_N EGR1_N H3MeK4_N CaNA_N
## 9.429731 36.335690 13.981307 15.299243 19.739341
Lots and lots of collinear predictors. Bad for us. Try to make auto drop for every predictor until vif() will be less than 2.
refit <- function(df){
## God, I hate update() function: it works wrong, when you try to pass a name of
## predictor to remove, and do not return an error, where it must.
predictors <- colnames(df)
# remove alias and our protein
predictors <- predictors[predictors != "ERBB4_N"]
predictors <- predictors[predictors != "pS6_N"]
# create lm
fit <- lm(as.formula(paste("ERBB4_N ~ ", paste0(predictors, collapse="+"))), data = df)
vif <- vif(fit)
# predictor with highest VIF
predictor <- vif[vif == max(vif)]
while(predictor >= 2){
print(paste(names(predictor), "- removed with VIF ", unname(predictor)))
predictors <- predictors[predictors != names(predictor)]
formula <- paste("ERBB4_N ~ ", paste(predictors, collapse="+"),sep = "")
fit <- lm(formula, data = df)
vif <- vif(fit)
predictor <- vif[vif == max(vif(fit))]
}
predictors
}
good_predictors <- refit(proteins_scale)
## [1] "NR1_N - removed with VIF 96.3116818563355"
## [1] "Bcatenin_N - removed with VIF 80.185395754743"
## [1] "TRKA_N - removed with VIF 68.7125497229012"
## [1] "ITSN1_N - removed with VIF 57.1982298340348"
## [1] "pERK_N - removed with VIF 51.2405869584966"
## [1] "pNR2B_N - removed with VIF 46.8357130093968"
## [1] "AcetylH3K9_N - removed with VIF 41.5845752162506"
## [1] "ERK_N - removed with VIF 39.7205044147796"
## [1] "PKCA_N - removed with VIF 37.3065887090433"
## [1] "ELK_N - removed with VIF 28.116283092608"
## [1] "pNR1_N - removed with VIF 23.3735321503139"
## [1] "GSK3B_N - removed with VIF 22.6660059168382"
## [1] "AMPKA_N - removed with VIF 21.2365225537289"
## [1] "pJNK_N - removed with VIF 21.0481571552638"
## [1] "NUMB_N - removed with VIF 20.8675977435915"
## [1] "pPKCAB_N - removed with VIF 18.9821861663082"
## [1] "DYRK1A_N - removed with VIF 18.0752682287076"
## [1] "pAKT_N - removed with VIF 17.2691438495695"
## [1] "JNK_N - removed with VIF 16.7194557418703"
## [1] "RAPTOR_N - removed with VIF 16.2282384000947"
## [1] "MTOR_N - removed with VIF 15.8528103748328"
## [1] "CaNA_N - removed with VIF 15.4444949118682"
## [1] "NR2A_N - removed with VIF 15.343761866049"
## [1] "pMEK_N - removed with VIF 13.7359014796248"
## [1] "H3MeK4_N - removed with VIF 13.3509829243519"
## [1] "pPKCG_N - removed with VIF 13.0995724187498"
## [1] "BDNF_N - removed with VIF 12.9091231449963"
## [1] "TIAM1_N - removed with VIF 12.6995851122132"
## [1] "pMTOR_N - removed with VIF 12.6015242679877"
## [1] "ARC_N - removed with VIF 12.3383491154238"
## [1] "MEK_N - removed with VIF 11.923004066665"
## [1] "EGR1_N - removed with VIF 11.8167260554141"
## [1] "CAMKII_N - removed with VIF 10.6323500515232"
## [1] "pCREB_N - removed with VIF 10.3260375314912"
## [1] "pNR2A_N - removed with VIF 9.52985009727428"
## [1] "NR2B_N - removed with VIF 8.50935947978871"
## [1] "CREB_N - removed with VIF 7.90086554849432"
## [1] "Ubiquitin_N - removed with VIF 7.66557004242396"
## [1] "pRSK_N - removed with VIF 7.3979818344419"
## [1] "BAD_N - removed with VIF 7.13628175472155"
## [1] "Tau_N - removed with VIF 6.89207485191464"
## [1] "pNUMB_N - removed with VIF 6.54094589915653"
## [1] "S6_N - removed with VIF 6.37878140046559"
## [1] "pBRAF_N - removed with VIF 6.28454019125777"
## [1] "pGSK3B_N - removed with VIF 5.76002644992395"
## [1] "GluR3_N - removed with VIF 5.37980828624963"
## [1] "P70S6_N - removed with VIF 4.83423607678496"
## [1] "P38_N - removed with VIF 4.57682740619134"
## [1] "nNOS_N - removed with VIF 4.45584083654734"
## [1] "IL1B_N - removed with VIF 4.19530094810221"
## [1] "AKT_N - removed with VIF 4.04897235157331"
## [1] "BCL2_N - removed with VIF 3.81577483065807"
## [1] "BRAF_N - removed with VIF 3.71514077587936"
## [1] "PSD95_N - removed with VIF 3.35832664299129"
## [1] "P3525_N - removed with VIF 3.20195587736889"
## [1] "SYP_N - removed with VIF 3.08932735156774"
## [1] "pCFOS_N - removed with VIF 2.83206948086796"
## [1] "BAX_N - removed with VIF 2.55337975610498"
## [1] "GluR4_N - removed with VIF 2.53439354905622"
## [1] "APP_N - removed with VIF 2.3867554632591"
## [1] "pGSK3B_Tyr216_N - removed with VIF 2.06324072435886"
print(good_predictors)
## [1] "pCAMKII_N" "pELK_N" "RSK_N" "SOD1_N" "DSCR1_N" "pP70S6_N"
## [7] "CDK5_N" "ADARB1_N" "RRP1_N" "GFAP_N" "pCASP9_N" "SNCA_N"
## [13] "SHH_N" "H3AcK18_N"
Recalculate reduced LM with good predictors only
formula <- paste("ERBB4_N ~ ", paste(good_predictors, collapse="+"),sep = "")
fit_red <- lm(formula, data = proteins_scale)
summary(fit_red)
##
## Call:
## lm(formula = formula, data = proteins_scale)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.13486 -0.35001 0.01093 0.34896 2.30041
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.05471 0.02751 -1.989 0.04724 *
## pCAMKII_N 0.06976 0.03160 2.207 0.02772 *
## pELK_N 0.04066 0.03405 1.194 0.23291
## RSK_N 0.19523 0.03548 5.502 5.81e-08 ***
## SOD1_N 0.07818 0.03230 2.421 0.01582 *
## DSCR1_N -0.06860 0.03495 -1.963 0.05019 .
## pP70S6_N -0.06879 0.03473 -1.981 0.04814 *
## CDK5_N 0.06013 0.02689 2.236 0.02575 *
## ADARB1_N 0.28033 0.02841 9.867 < 2e-16 ***
## RRP1_N -0.02269 0.02807 -0.808 0.41922
## GFAP_N -0.14566 0.04048 -3.598 0.00035 ***
## pCASP9_N 0.43074 0.03050 14.121 < 2e-16 ***
## SNCA_N 0.12312 0.03738 3.294 0.00105 **
## SHH_N 0.25811 0.03022 8.540 < 2e-16 ***
## H3AcK18_N 0.27615 0.03124 8.840 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5753 on 537 degrees of freedom
## Multiple R-squared: 0.6672, Adjusted R-squared: 0.6585
## F-statistic: 76.91 on 14 and 537 DF, p-value: < 2.2e-16
vif(fit_red)
## pCAMKII_N pELK_N RSK_N SOD1_N DSCR1_N pP70S6_N CDK5_N ADARB1_N
## 1.895677 1.354908 1.762407 1.755229 1.759048 1.797288 1.675283 1.447068
## RRP1_N GFAP_N pCASP9_N SNCA_N SHH_N H3AcK18_N
## 1.290564 1.820407 1.549181 1.918910 1.452835 1.646402
Remove non-significant and predictors until all of them are significant at 0.05 or less (one by one, but in code next it done in bulk for shortage)
fit_red <- fit_red %>% update(.~.-pELK_N -RRP1_N)
fit_red <- fit_red %>% update(.~.-DSCR1_N -pCAMKII_N -pP70S6_N)
summary(fit_red)
##
## Call:
## lm(formula = ERBB4_N ~ RSK_N + SOD1_N + CDK5_N + ADARB1_N + GFAP_N +
## pCASP9_N + SNCA_N + SHH_N + H3AcK18_N, data = proteins_scale)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.15625 -0.34013 -0.00287 0.35036 2.40904
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.04841 0.02711 -1.785 0.07476 .
## RSK_N 0.17514 0.02975 5.888 6.88e-09 ***
## SOD1_N 0.08899 0.03220 2.764 0.00591 **
## CDK5_N 0.05865 0.02627 2.232 0.02602 *
## ADARB1_N 0.29353 0.02612 11.239 < 2e-16 ***
## GFAP_N -0.16036 0.03786 -4.236 2.67e-05 ***
## pCASP9_N 0.45504 0.02786 16.334 < 2e-16 ***
## SNCA_N 0.10454 0.03563 2.934 0.00348 **
## SHH_N 0.25409 0.02987 8.508 < 2e-16 ***
## H3AcK18_N 0.26532 0.02999 8.848 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5783 on 542 degrees of freedom
## Multiple R-squared: 0.6607, Adjusted R-squared: 0.6551
## F-statistic: 117.3 on 9 and 542 DF, p-value: < 2.2e-16
Our model is not good (explain just 0.66 of all variance), but let’s test it - perhapse, it isn’t useful at all.
fortified_fit_red <- data.frame(fortify(fit_red), proteins_scale)
gg_resid <- ggplot(data = fortified_fit_red, aes(x = .fitted, y = .stdresid)) +
geom_point() +
geom_hline(yintercept = 0) +
geom_smooth(method = "lm") +
geom_hline(yintercept = 2, color = "red") +
geom_hline(yintercept = -2, color = "red")
gg_resid
## `geom_smooth()` using formula 'y ~ x'
Residuals demonstrate heavy outliers. Model should be corrected (perhaps, add back some of discarded predictors)
ggplot(fortified_fit_red, aes(x = 1:nrow(fortified_fit_red), y = .cooksd)) +
geom_bar(stat = "identity") +
geom_hline(yintercept = 2, color = "red")
Ok, no overweighted values
qqPlot(fortified_fit_red$.stdresid)
## [1] 37 88
Not very good, but looks like
predictors <- colnames(proteins_scale %>% dplyr::select(-pS6_N, -ERBB4_N))
dropped_predictors <- predictors[!(predictors %in% good_predictors)]
draw_it <- function(i){
gg_resid + aes(x = fortified_fit_red[,i]) + xlab(i) + theme(axis.title.y=element_blank())
}
plots <- lapply(dropped_predictors, draw_it)
ggarrange(plotlist = plots, ncol = 4, nrow = 3)
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## attr(,"class")
## [1] "list" "ggarrange"
Okay, there is correlation with several excluded predictors (for example, PSD95_N, pCFOS_N and other), which should be included back to model again. But we will not do it here.
We have lots of multicollinear predictors in a dataset. But even we drop 3/4 of them, we have model with quite large residuals, and our best result explain about 0.66 of all variability - so, I think, LM isn’t good choice.
Make an ordinations
pca <- rda(proteins_scale)
head(summary(pca))
##
## Call:
## rda(X = proteins_scale)
##
## Partitioning of variance:
## Inertia Proportion
## Total 72.73 1
## Unconstrained 72.73 1
##
## Eigenvalues, and their contribution to the variance
##
## Importance of components:
## PC1 PC2 PC3 PC4 PC5 PC6 PC7
## Eigenvalue 23.7479 11.7196 7.13238 6.60687 2.70290 2.68669 2.23181
## Proportion Explained 0.3265 0.1611 0.09806 0.09084 0.03716 0.03694 0.03068
## Cumulative Proportion 0.3265 0.4876 0.58570 0.67654 0.71370 0.75064 0.78132
## PC8 PC9 PC10 PC11 PC12 PC13 PC14
## Eigenvalue 1.74769 1.65562 1.24899 1.05229 0.95354 0.87466 0.721917
## Proportion Explained 0.02403 0.02276 0.01717 0.01447 0.01311 0.01203 0.009926
## Cumulative Proportion 0.80535 0.82812 0.84529 0.85976 0.87287 0.88489 0.894817
## PC15 PC16 PC17 PC18 PC19 PC20
## Eigenvalue 0.590981 0.549009 0.468580 0.457849 0.428005 0.398369
## Proportion Explained 0.008125 0.007548 0.006442 0.006295 0.005885 0.005477
## Cumulative Proportion 0.902942 0.910490 0.916933 0.923228 0.929112 0.934590
## PC21 PC22 PC23 PC24 PC25 PC26
## Eigenvalue 0.358522 0.329703 0.304326 0.284594 0.229786 0.219260
## Proportion Explained 0.004929 0.004533 0.004184 0.003913 0.003159 0.003015
## Cumulative Proportion 0.939519 0.944052 0.948236 0.952149 0.955308 0.958323
## PC27 PC28 PC29 PC30 PC31 PC32
## Eigenvalue 0.202036 0.192831 0.161839 0.159576 0.141616 0.127370
## Proportion Explained 0.002778 0.002651 0.002225 0.002194 0.001947 0.001751
## Cumulative Proportion 0.961101 0.963752 0.965977 0.968171 0.970118 0.971869
## PC33 PC34 PC35 PC36 PC37 PC38
## Eigenvalue 0.123317 0.119153 0.111057 0.103473 0.09236 0.08948
## Proportion Explained 0.001695 0.001638 0.001527 0.001423 0.00127 0.00123
## Cumulative Proportion 0.973565 0.975203 0.976730 0.978152 0.97942 0.98065
## PC39 PC40 PC41 PC42 PC43 PC44
## Eigenvalue 0.084482 0.080133 0.076943 0.076131 0.0688252 0.0659858
## Proportion Explained 0.001162 0.001102 0.001058 0.001047 0.0009463 0.0009072
## Cumulative Proportion 0.981814 0.982916 0.983974 0.985020 0.9859666 0.9868738
## PC45 PC46 PC47 PC48 PC49
## Eigenvalue 0.0633949 0.0627262 0.0593197 0.0526344 0.0510792
## Proportion Explained 0.0008716 0.0008624 0.0008156 0.0007237 0.0007023
## Cumulative Proportion 0.9877455 0.9886079 0.9894234 0.9901471 0.9908494
## PC50 PC51 PC52 PC53 PC54
## Eigenvalue 0.0479737 0.0452658 0.0441243 0.0428464 0.0409369
## Proportion Explained 0.0006596 0.0006224 0.0006067 0.0005891 0.0005628
## Cumulative Proportion 0.9915090 0.9921313 0.9927380 0.9933271 0.9938899
## PC55 PC56 PC57 PC58 PC59
## Eigenvalue 0.0367938 0.0359695 0.0340511 0.0325540 0.0308476
## Proportion Explained 0.0005059 0.0004945 0.0004682 0.0004476 0.0004241
## Cumulative Proportion 0.9943958 0.9948903 0.9953585 0.9958061 0.9962302
## PC60 PC61 PC62 PC63 PC64
## Eigenvalue 0.0291207 0.0266732 0.0252606 0.0226115 0.0215873
## Proportion Explained 0.0004004 0.0003667 0.0003473 0.0003109 0.0002968
## Cumulative Proportion 0.9966306 0.9969973 0.9973446 0.9976555 0.9979523
## PC65 PC66 PC67 PC68 PC69
## Eigenvalue 0.0191436 0.0188885 0.0180865 0.0165203 0.0146492
## Proportion Explained 0.0002632 0.0002597 0.0002487 0.0002271 0.0002014
## Cumulative Proportion 0.9982155 0.9984752 0.9987239 0.9989510 0.9991524
## PC70 PC71 PC72 PC73 PC74
## Eigenvalue 0.0127597 0.012004 0.0097787 0.0094075 6.621e-03
## Proportion Explained 0.0001754 0.000165 0.0001344 0.0001293 9.103e-05
## Cumulative Proportion 0.9993279 0.999493 0.9996273 0.9997567 9.998e-01
## PC75 PC76
## Eigenvalue 6.205e-03 4.871e-03
## Proportion Explained 8.531e-05 6.697e-05
## Cumulative Proportion 9.999e-01 1.000e+00
##
## Scaling 2 for species and site scores
## * Species are scaled proportional to eigenvalues
## * Sites are unscaled: weighted dispersion equal on all dimensions
## * General scaling constant of scores: 14.14884
##
##
## Species scores
##
## PC1 PC2 PC3 PC4 PC5 PC6
## DYRK1A_N -0.2295 -0.6686 0.06313 -0.37251 0.26677 -0.009456
## ITSN1_N -0.4979 -0.7818 0.03282 -0.26909 0.34936 0.100461
## BDNF_N -1.5676 -0.3441 0.06403 -0.44447 0.15261 -0.066180
## NR1_N -1.5706 -0.2778 0.51765 -0.01343 -0.11566 0.075059
## NR2A_N -1.3212 -0.3145 0.67291 -0.05494 -0.04272 -0.015097
## pAKT_N -0.9396 0.8154 -0.50893 -0.42398 -0.21462 0.216800
## ....
##
##
## Site scores (weighted sums of species scores)
##
## PC1 PC2 PC3 PC4 PC5 PC6
## 76 -0.7937 -0.5367 0.4364 0.4141 0.2217 -0.8140
## 77 -0.7374 -0.4628 0.3746 0.5014 0.3151 -1.0306
## 78 -0.8675 -0.4535 0.3847 0.3733 0.4134 -0.9041
## 79 -0.2998 -0.5313 0.3439 0.1274 -0.4611 -0.5489
## 80 -0.3628 -0.4460 0.1840 0.2636 -0.2013 -0.8447
## 81 -0.3663 -0.4273 0.1751 0.3349 -0.1623 -0.7517
## ....
Plot factors’ biplot and total biplot
biplot(pca, scaling = "species", display = "species")
biplot(pca)
Plot explained by PCA proportions (just for first 10 axes)
pca_summary <- summary(pca)
pca_result <- as.data.frame(pca_summary$cont)
plot_data <- as.data.frame(t(as.matrix(pca_result[c("Proportion Explained"),])))
plot_data$component <- rownames(plot_data)
plot_data$component <- factor(plot_data$component , levels = plot_data$component)
ggplot(plot_data[1:10,], aes(component, `Proportion Explained`)) +
geom_bar(stat = "identity") +
scale_x_discrete(guide = guide_axis(angle = 90))
Draw plots, and 3D plot for first 3 axes
df_scores <- data.frame(scores(pca, display = "sites", choices = c(1, 2, 3), scaling = "sites"), data %>% na.omit)
p_genotype <- ggplot(df_scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Genotype), alpha = 0.5)
p_treatment <- ggplot(df_scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Treatment), alpha = 0.5)
p_behavior <- ggplot(df_scores, aes(x = PC1, y = PC2)) +
geom_point(aes(color = Behavior), alpha = 0.5)
ggarrange(p_genotype, p_treatment, p_behavior, nrow = 2, ncol = 2)
plot_ly(x=df_scores$PC1, y=df_scores$PC2, z=df_scores$PC3, color = df_scores$class, colors = "Dark2", type="scatter3d", mode="markers", size = 1) %>%
layout(title = "PCA 3d scatter plot",
scene = list(
xaxis = list(title = "PC1"),
yaxis = list(title = "PC2"),
zaxis = list(title = "PC3")
))
## Warning: `arrange_()` is deprecated as of dplyr 0.7.0.
## Please use `arrange()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.
DeSEQ2 works only with integer numbers, so the minimum value from dataset is 0.0525284, maximum is 8.4825534. If we multipe it to 20 and round, minimum will be around 1. (Actually, I am not sure, that this rough transformation doesn’t ruin all data…)
Let’s find out, where we can find significant difference in protein abundance according metadata
d_data <- data.frame(t(as.matrix(data %>% na.omit %>% keep(is.numeric)))) %>%
mutate(across(where(is.numeric), function(x){round(100*x)}))
d_metadata <- data %>% na.omit %>% keep(is.factor)
colnames(d_data) <- d_metadata$MouseID
rownames(d_data) <- colnames(proteins_scale)
d_data[1:10,1:10]
## 3415_1 3415_2 3415_3 3415_4 3415_5 3415_6 3415_7 3415_8 3415_9
## DYRK1A_N 65 62 64 58 54 57 49 49 51
## ITSN1_N 83 84 85 76 76 76 67 63 66
## BDNF_N 41 39 40 35 35 34 31 30 31
## NR1_N 292 286 297 262 263 260 244 232 242
## NR2A_N 517 519 535 473 474 476 426 415 434
## pAKT_N 21 22 21 21 21 20 23 20 21
## pBRAF_N 18 17 17 16 17 16 16 15 15
## pCAMKII_N 373 365 381 378 387 384 373 357 383
## pCREB_N 24 22 22 19 19 19 19 16 17
## pELK_N 167 157 174 151 153 158 131 129 143
## 3415_10
## DYRK1A_N 41
## ITSN1_N 54
## BDNF_N 27
## NR1_N 203
## NR2A_N 339
## pAKT_N 22
## pBRAF_N 16
## pCAMKII_N 307
## pCREB_N 15
## pELK_N 117
d_metadata[1:10,]
## # A tibble: 10 x 5
## MouseID Genotype Treatment Behavior class
## <fct> <fct> <fct> <fct> <fct>
## 1 3415_1 Control Memantine C/S c-CS-m
## 2 3415_2 Control Memantine C/S c-CS-m
## 3 3415_3 Control Memantine C/S c-CS-m
## 4 3415_4 Control Memantine C/S c-CS-m
## 5 3415_5 Control Memantine C/S c-CS-m
## 6 3415_6 Control Memantine C/S c-CS-m
## 7 3415_7 Control Memantine C/S c-CS-m
## 8 3415_8 Control Memantine C/S c-CS-m
## 9 3415_9 Control Memantine C/S c-CS-m
## 10 3415_10 Control Memantine C/S c-CS-m
significant_proteins <- function(model){
dds <- DESeqDataSetFromMatrix(countData=d_data,
colData=d_metadata,
design=model)
dds <- DESeq(dds)
res <- data.frame(results(dds))
rownames(res) <- rownames(d_data)
res <- res[res$padj < 0.05,]
rownames(res)
}
significant_proteins(~Behavior)
## [1] "DYRK1A_N" "ITSN1_N" "BDNF_N" "NR1_N"
## [5] "NR2A_N" "pAKT_N" "pBRAF_N" "pCAMKII_N"
## [9] "pCREB_N" "pELK_N" "pERK_N" "pJNK_N"
## [13] "PKCA_N" "pMEK_N" "pNR2A_N" "pPKCAB_N"
## [17] "pRSK_N" "AKT_N" "BRAF_N" "CAMKII_N"
## [21] "CREB_N" "ELK_N" "ERK_N" "GSK3B_N"
## [25] "TRKA_N" "APP_N" "SOD1_N" "MTOR_N"
## [29] "P38_N" "pMTOR_N" "DSCR1_N" "NR2B_N"
## [33] "pNUMB_N" "RAPTOR_N" "pP70S6_N" "NUMB_N"
## [37] "pGSK3B_N" "pPKCG_N" "CDK5_N" "S6_N"
## [41] "ADARB1_N" "ARC_N" "ERBB4_N" "nNOS_N"
## [45] "Tau_N" "GFAP_N" "GluR3_N" "IL1B_N"
## [49] "pCASP9_N" "PSD95_N" "SNCA_N" "Ubiquitin_N"
## [53] "pGSK3B_Tyr216_N" "SHH_N" "BAD_N" "BCL2_N"
## [57] "pS6_N" "H3AcK18_N" "EGR1_N" "H3MeK4_N"
## [61] "CaNA_N"
significant_proteins(~Treatment)
## [1] "DYRK1A_N" "ITSN1_N" "NR1_N" "pAKT_N" "pCAMKII_N"
## [6] "pERK_N" "pJNK_N" "pNR2A_N" "pPKCAB_N" "pRSK_N"
## [11] "AKT_N" "BRAF_N" "ELK_N" "ERK_N" "TRKA_N"
## [16] "Bcatenin_N" "SOD1_N" "P38_N" "pMTOR_N" "DSCR1_N"
## [21] "NR2B_N" "pP70S6_N" "NUMB_N" "pGSK3B_N" "pPKCG_N"
## [26] "CDK5_N" "ADARB1_N" "AcetylH3K9_N" "BAX_N" "NA"
## [31] "NA.1" "GluR3_N" "NA.2" "IL1B_N" "P3525_N"
## [36] "PSD95_N" "Ubiquitin_N" "NA.3" "NA.4" "SYP_N"
## [41] "H3AcK18_N" "EGR1_N" "CaNA_N"
significant_proteins(~Genotype)
## [1] "DYRK1A_N" "ITSN1_N" "NR1_N" "NR2A_N"
## [5] "pCREB_N" "pERK_N" "pNR1_N" "pNR2A_N"
## [9] "pNR2B_N" "pRSK_N" "AKT_N" "BRAF_N"
## [13] "GSK3B_N" "APP_N" "Bcatenin_N" "SOD1_N"
## [17] "MTOR_N" "P38_N" "pMTOR_N" "AMPKA_N"
## [21] "NR2B_N" "RAPTOR_N" "pP70S6_N" "pPKCG_N"
## [25] "S6_N" "AcetylH3K9_N" "ARC_N" "Tau_N"
## [29] "GluR3_N" "IL1B_N" "pCASP9_N" "PSD95_N"
## [33] "SNCA_N" "pGSK3B_Tyr216_N" "pS6_N" "pCFOS_N"
## [37] "SYP_N" "H3AcK18_N" "EGR1_N" "CaNA_N"